This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
source("tianfengRwrappers.R")
library(xgboost)
library(Matrix)
library(mclust)
library(tidyverse)
ds2 <- readRDS("ds2.rds")
Idents(ds2) <- ds2$seurat_clusters
Idents(ds1) <- ds1$seurat_clusters
Idents(ds0) <- ds0$seurat_clusters
ds2_data <- get_data_table(ds2, highvar = F, type = "data")
ds2_label <- as.numeric(as.character(Idents(ds2)))
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],"dgCMatrix")), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],"dgCMatrix")), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = "multi:softprob", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
saveRDS(bst_model, "ds2_model.rds")
eval_loss <- bst_model[["evaluation_log"]][["eval_mlogloss"]]
plot_ly(data.frame(eval_loss), x = c(1:100), y = eval_loss) %>%
add_trace(type = "scatter", mode = "markers+lines",
marker = list(color = "black", line = list(color = "#1E90FFC7", width = 1)),
line = list(color = "#1E90FF80", width = 2)) %>%
layout(xaxis = list(title = "epoch"),yaxis = list(title = "eval_mlogloss"))
importance <- xgb.importance(colnames(ds2_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) + theme_bw()+theme(
axis.title.x = element_text(size = 15), axis.text.x = element_text(size = 12, colour = "black"),
axis.title.y = element_text(size = 15), axis.text.y = element_text(size = 12, colour = "black"),
legend.text = element_text(size = 20), legend.title = element_blank(), panel.grid = element_blank())
Idents(ds1) <- ds1$seurat_clusters
temp <- get_data_table(ds1, highvar = F, type = "data")
ds1_data <- matrix(data=0, nrow = length(rownames(ds2_data)), ncol = length(colnames(temp)),
byrow = FALSE, dimnames = list(rownames(ds2_data),colnames(temp)))
for(i in intersect(rownames(ds2_data), rownames(temp))){
ds1_data[i,] <- temp[i,]
}
rm(temp)
ds1_label <- as.numeric(as.character(Idents(ds1)))
colnames(ds1_data) <- NULL
ds1_test_data <- list(data = t(as(ds1_data,"dgCMatrix")), label = ds1_label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
#预测结果
predict_ds1_test <- predict(bst_model, newdata = ds1_test)
predict_prop_ds1 <- matrix(data=predict_ds1_test, nrow = length(levels(Idents(ds2))),
ncol = ncol(ds1), byrow = FALSE,
dimnames = list(levels(Idents(ds2)),colnames(ds1)))
## 得到分群结果
ds1_res <- apply(predict_prop_ds1,2,func,rownames(predict_prop_ds1))
adjustedRandIndex(ds1_res, ds1_test_data$label)
Idents(ds1) <- factor(ds1_res,levels = c(0:4))
umapplot(ds1)
ds1$supclustering <- Idents(ds1) #保存监督聚类结果
embedding <- FetchData(object = ds1, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_ds1))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 3, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
new_scale("color") +
geom_point(data = embedding[embedding$`3`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `3`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('3', low = "#FFFFFF00", high = "#d1eba8") +
new_scale("color") +
geom_point(data = embedding[embedding$`4`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `4`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('4', low = "#FFFFFF00", high = "#b1d6fb") +
new_scale("color") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("pre_ds1_umap.svg",device = svg,plot = ggobj,height = 10,width = 10)
Idents(ds0) <- ds0$seurat_clusters
temp <- get_data_table(ds0, highvar = F, type = "data")
ds0_data <- matrix(data=0, nrow = length(rownames(ds2_data)), ncol = length(colnames(temp)),
byrow = FALSE, dimnames = list(rownames(ds2_data),colnames(temp)))
for(i in intersect(rownames(ds2_data), rownames(temp))){
ds0_data[i,] <- temp[i,]
}
rm(temp)
ds0_label <- as.numeric(as.character(Idents(ds0)))
colnames(ds0_data) <- NULL
ds0_test_data <- list(data = t(as(ds0_data,"dgCMatrix")), label = ds0_label)
ds0_test <- xgb.DMatrix(data = ds0_test_data$data,label = ds0_test_data$label)
#预测结果
predict_ds0_test <- predict(bst_model, newdata = ds0_test)
predict_prop_ds0 <- matrix(data=predict_ds0_test, nrow = length(levels(Idents(ds2))),
ncol = ncol(ds0), byrow = FALSE,
dimnames = list(levels(Idents(ds2)),colnames(ds0)))
## 得到分群结果
ds0_res <- apply(predict_prop_ds0,2,func,rownames(predict_prop_ds0))
adjustedRandIndex(ds0_res, ds0_test_data$label)
Idents(ds0) <- factor(ds0_res,levels = c(0:4))
umapplot(ds0)
ds0$supclustering <- Idents(ds0) #保存监督聚类结果
embedding <- FetchData(object = ds0, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_ds0))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 3, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
new_scale("color") +
geom_point(data = embedding[embedding$`3`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `3`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('3', low = "#FFFFFF00", high = "#d1eba8") +
new_scale("color") +
geom_point(data = embedding[embedding$`4`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `4`),shape=16, size = 3, alpha=0.5) +
scale_color_gradient('4', low = "#FFFFFF00", high = "#b1d6fb") +
new_scale("color") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("pre_ds0_umap.svg",device = svg,plot = ggobj,height = 10,width = 10)
Idents(ds2_PA) <- ds2_PA$seurat_clusters
selected_features <- read.csv("./datatable/selected_features.csv", stringsAsFactors = F)
selected_features <- selected_features$x
PA_data <- get_data_table(ds2_PA, highvar = F, type = "data")
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_train_data <- list(data = t(as(PA_data,"dgCMatrix")), label = PA_label)
PA_train <- xgb.DMatrix(data = PA_train_data$data,label = PA_train_data$label)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_PA))),
objective = "multi:softprob", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, PA_train, nrounds = 100, verbose = 0)
Idents(ds2_AC) <- ds2_AC$seurat_clusters
AC_data <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_test_data <- list(data = t(as(AC_data,"dgCMatrix")), label = AC_label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
#预测结果
predict_prop_AC <-predict(bst_model, newdata = AC_test) %>%
matrix(nrow = length(levels(Idents(ds2_PA))),
ncol = ncol(ds2_AC), byrow = FALSE,
dimnames = list(levels(Idents(ds2_PA)),colnames(ds2_AC)))
AC_res <- apply(predict_prop_AC,2,func,rownames(predict_prop_AC))
confuse_matrix1 <- table(AC_test_data$label, AC_res, dnn=c("true","pre"))
sankey_plot(confuse_matrix1,session = "PAtoAC")
Idents(ds2_AC) <- factor(AC_res,levels = c(0:2))
umapplot(ds2_AC)
embedding <- FetchData(object = ds2_AC, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_AC))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 2, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("ds2_PAtoAC_umap.svg",device = svg,plot = ggobj,height = 8,width = 8)
Idents(ds2_AC) <- ds2_AC$seurat_clusters
selected_features <- read.csv("./datatable/selected_features.csv", stringsAsFactors = F)
selected_features <- selected_features$x
AC_data <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data,"dgCMatrix")), label = AC_label)
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
xgb_ACram <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = "multi:softprob", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_ACram, AC_train, nrounds = 100, verbose = 0)
Idents(ds2_PA) <- factor(ds2_PA$seurat_clusters,levels = c(0,1,2))
PA_data <- get_data_table(ds2_PA, highvar = F, type = "data")
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_test_data <- list(data = t(as(PA_data,"dgCMatrix")), label = PA_label)
PA_test <- xgb.DMatrix(data = PA_test_data$data,label = PA_test_data$label)
#预测结果
predict_prop_PA <-predict(bst_model, newdata = PA_test) %>%
matrix(nrow = length(levels(Idents(ds2_AC))),
ncol = ncol(ds2_PA), byrow = FALSE,
dimnames = list(levels(Idents(ds2_AC)),colnames(ds2_PA)))
PA_res <- apply(predict_prop_PA,2,func,rownames(predict_prop_PA))
confuse_matrix1 <- table(PA_test_data$label, PA_res, dnn=c("true","pre"))
sankey_plot(confuse_matrix1,session = "ACtoPA")
Idents(ds2_PA) <- factor(PA_res)
umapplot(ds2_PA)
embedding <- FetchData(object = ds2_PA, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_PA))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 2, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
new_scale("color") +
geom_point(data = embedding[embedding$`3`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `3`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('3', low = "#FFFFFF00", high = "#d1eba8") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("ds2_ACtoPA_umap.svg",device = svg,plot = ggobj,height = 8,width = 8)
Idents(ds0) <- ds0$seurat_clusters
ds0_data <- get_data_table(ds0, highvar = F, type = "data")
ds0_label <- as.numeric(as.character(Idents(ds0)))
index <- c(1:dim(ds0_data)[2]) %>% sample(ceiling(0.3*dim(ds0_data)[2]), replace = F, prob = NULL)
colnames(ds0_data) <- NULL
ds0_train_data <- list(data = t(as(ds0_data[,-index],"dgCMatrix")), label = ds0_label[-index])
ds0_test_data <- list(data = t(as(ds0_data[,index],"dgCMatrix")), label = ds0_label[index])
ds0_train <- xgb.DMatrix(data = ds0_train_data$data,label = ds0_train_data$label)
ds0_test <- xgb.DMatrix(data = ds0_test_data$data,label = ds0_test_data$label)
watchlist <- list(train = ds0_train, eval = ds0_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds0))),
objective = "multi:softprob", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds0_train, nrounds = 100, watchlist, verbose = 0)
saveRDS(bst_model, "ds0_model.rds")
eval_loss <- bst_model[["evaluation_log"]][["eval_mlogloss"]]
plot_ly(data.frame(eval_loss), x = c(1:100), y = eval_loss) %>%
add_trace(type = "scatter", mode = "markers+lines",
marker = list(color = "black", line = list(color = "#1E90FFC7", width = 1)),
line = list(color = "#1E90FF80", width = 2)) %>%
layout(xaxis = list(title = "epoch"),yaxis = list(title = "eval_mlogloss"))
importance <- xgb.importance(colnames(ds0_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) + theme_bw()+theme(
axis.title.x = element_text(size = 15), axis.text.x = element_text(size = 8, colour = "black"),
axis.title.y = element_text(size = 15), axis.text.y = element_text(size = 12, colour = "black"),
legend.text = element_text(size = 20), legend.title = element_blank(), panel.grid = element_blank())
write.csv(importance, "./datatable/ds0_features.csv", row.names = F)
multi_featureplot(head(importance,9)$Feature, ds0, labels = "")
Idents(ds2) <- ds2$seurat_clusters
temp <- get_data_table(ds2, highvar = F, type = "data")
ds2_data <- matrix(data=0, nrow = length(rownames(ds0_data)), ncol = length(colnames(temp)),
byrow = FALSE, dimnames = list(rownames(ds0_data),colnames(temp)))
for(i in intersect(rownames(ds2_data), rownames(temp))){
ds2_data[i,] <- temp[i,]
}
rm(temp)
ds2_label <- as.numeric(as.character(Idents(ds2)))
colnames(ds2_data) <- NULL
ds2_test_data <- list(data = t(as(ds2_data,"dgCMatrix")), label = ds2_label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
#预测结果
predict_ds2_test <- predict(bst_model, newdata = ds2_test)
predict_prop_ds2 <- matrix(data=predict_ds2_test, nrow = bst_model[["params"]][["num_class"]],
ncol = ncol(ds2), byrow = FALSE,
dimnames = list(c(0:(bst_model[["params"]][["num_class"]]-1)),colnames(ds2)))
## 得到分群结果
ds2_res <- apply(predict_prop_ds2,2,func,rownames(predict_prop_ds2))
adjustedRandIndex(ds2_res, ds2_test_data$label)
confuse_matrix1 <- table(ds2_test_data$label, ds2_res, dnn=c("true","pre"))
sankey_plot(confuse_matrix1,0:5,0:4,session = "ds0tods2")
Idents(ds2) <- factor(ds2_res,levels = c(0:5))
umapplot(ds2)
embedding <- FetchData(object = ds2, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_ds2))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 2, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
new_scale("color") +
geom_point(data = embedding[embedding$`3`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `3`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('3', low = "#FFFFFF00", high = "#d1eba8") +
new_scale("color") +
geom_point(data = embedding[embedding$`4`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `4`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('4', low = "#FFFFFF00", high = "#b1d6fb") +
new_scale("color") +
geom_point(data = embedding[embedding$`5`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `5`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('5', low = "#FFFFFF00", high = "#fd9999") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("ds0tods2umap.svg",device = svg,plot = ggobj,height = 8,width = 8)
Idents(ds1) <- ds1$seurat_clusters
temp <- get_data_table(ds1, highvar = F, type = "data")
ds1_data <- matrix(data=0, nrow = length(rownames(ds0_data)), ncol = length(colnames(temp)),
byrow = FALSE, dimnames = list(rownames(ds0_data),colnames(temp)))
for(i in intersect(rownames(ds1_data), rownames(temp))){
ds1_data[i,] <- temp[i,]
}
rm(temp)
ds1_label <- as.numeric(as.character(Idents(ds1)))
colnames(ds1_data) <- NULL
ds1_test_data <- list(data = t(as(ds1_data,"dgCMatrix")), label = ds1_label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
#预测结果
predict_ds1_test <- predict(bst_model, newdata = ds1_test)
predict_prop_ds1 <- matrix(data=predict_ds1_test, nrow = bst_model[["params"]][["num_class"]],
ncol = ncol(ds1), byrow = FALSE,
dimnames = list(c(0:(bst_model[["params"]][["num_class"]]-1)),colnames(ds1)))
## 得到分群结果
ds1_res <- apply(predict_prop_ds1,2,func,rownames(predict_prop_ds1))
adjustedRandIndex(ds1_test_data$label, ds1_res)
Idents(ds1) <- factor(ds1_res,levels = c(0:5))
umapplot(ds1)
# umapplot(ds1,group.by = "Classification1")
confuse_matrix <- table(ds1_test_data$label, ds1_res, dnn=c("true","pre"))
sankey_plot(confuse_matrix,c(0:4),c(0:4),session = "ds0tods1")
embedding <- FetchData(object = ds1, vars = c("UMAP_1", "UMAP_2"))
embedding <- cbind(embedding, t(predict_prop_ds1))
ggobj <- ggplot() +
geom_point(data = embedding[embedding$`0`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `0`), shape=16, size = 2, alpha=0.5) +
scale_color_gradient('0', low = "#FFFFFF00", high = "#6dc0a6") +
new_scale("color") +
geom_point(data = embedding[embedding$`1`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `1`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('1', low = "#FFFFFF00", high = "#e2b398") +
new_scale("color") +
geom_point(data = embedding[embedding$`2`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `2`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('2', low = "#FFFFFF00", high = "#e2a2ca") +
new_scale("color") +
geom_point(data = embedding[embedding$`3`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `3`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('3', low = "#FFFFFF00", high = "#d1eba8") +
new_scale("color") +
geom_point(data = embedding[embedding$`4`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `4`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('4', low = "#FFFFFF00", high = "#b1d6fb") +
new_scale("color") +
geom_point(data = embedding[embedding$`5`>0.1,],
aes(x = UMAP_1, y = UMAP_2, color = `5`),shape=16, size = 2, alpha=0.5) +
scale_color_gradient('5', low = "#FFFFFF00", high = "#fd9999") +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(axis.line = element_line(arrow = arrow(length = unit(0.2, "cm")))) +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(breaks = NULL) +
theme(panel.background = element_blank(), panel.grid = element_blank(), legend.position = "bottom")
ggsave("ds0tods1umap.svg",device = svg,plot = ggobj,height = 8,width = 8)
lym_ds2 <- readRDS("./lym_ds2.rds")
Idents(lym_ds2) <- lym_ds2$conditions
lym_ds2_AC <- subset(lym_ds2, idents = "AC")
lym_ds2_PA <- subset(lym_ds2, idents = "PA")
set.seed(7)
lym_PA_data <- get_data_table(lym_ds2_PA, highvar = T, type = "data")
lym_AC_data <- get_data_table(lym_ds2_AC, highvar = T, type = "data")
res <- list()
for(reso in seq(0.05,0.3,0.05))
{
lym_ds2_AC <- lym_ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = reso)
lym_ds2_PA <- lym_ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = reso)
lym_PA_label <- as.numeric(as.character(Idents(lym_ds2_PA)))
index <- c(1:dim(lym_PA_data)[2]) %>% sample(ceiling(0.3*dim(lym_PA_data)[2]), replace = F, prob = NULL)
colnames(lym_PA_data) <- NULL
lym_PA_train_data <- list(data = t(as(lym_PA_data[,-index],"dgCMatrix")), label = lym_PA_label[-index])
lym_PA_test_data <- list(data = t(as(lym_PA_data[,index],"dgCMatrix")), label = lym_PA_label[index])
lym_PA_train <- xgb.DMatrix(data = lym_PA_train_data$data,label = lym_PA_train_data$label)
lym_PA_test <- xgb.DMatrix(data = lym_PA_test_data$data,label = lym_PA_test_data$label)
watchlist <- list(train = lym_PA_train, eval = lym_PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(lym_ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, lym_PA_train, nrounds = 100, watchlist, verbose = 0)
lym_AC_label <- as.numeric(as.character(Idents(lym_ds2_AC)))
colnames(lym_AC_data) <- NULL
lym_AC_test_data <- list(data = t(as(lym_AC_data,"dgCMatrix")), label = lym_AC_label)
lym_AC_test <- xgb.DMatrix(data = lym_AC_test_data$data,label = lym_AC_test_data$label)
predict_lym_AC_test <- round(predict(bst_model, newdata = lym_AC_test))
res <- append(x = res,values = adjustedRandIndex(predict_lym_AC_test, lym_AC_test_data$label))
}
res
ds2 <- readRDS("./ds2.rds")
Idents( ds2) <- ds2$conditions
ds2_AC <- subset( ds2, idents = "AC")
ds2_PA <- subset( ds2, idents = "PA")
set.seed(7)
PA_data <- get_data_table( ds2_PA, highvar = T, type = "data")
AC_data <- get_data_table( ds2_AC, highvar = T, type = "data")
ds2_res <- list()
for(reso in seq(0.05,0.3,0.05))
{
ds2_AC <- ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = reso)
ds2_PA <- ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = reso)
PA_label <- as.numeric(as.character(Idents( ds2_PA)))
index <- c(1:dim( PA_data)[2]) %>% sample(ceiling(0.3*dim( PA_data)[2]), replace = F, prob = NULL)
colnames( PA_data) <- NULL
PA_train_data <- list(data = t(as( PA_data[,-index],"dgCMatrix")), label = PA_label[-index])
PA_test_data <- list(data = t(as( PA_data[,index],"dgCMatrix")), label = PA_label[index])
PA_train <- xgb.DMatrix(data = PA_train_data$data,label = PA_train_data$label)
PA_test <- xgb.DMatrix(data = PA_test_data$data,label = PA_test_data$label)
watchlist <- list(train = PA_train, eval = PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents( ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, PA_train, nrounds = 100, watchlist, verbose = 0)
AC_label <- as.numeric(as.character(Idents( ds2_AC)))
colnames( AC_data) <- NULL
AC_test_data <- list(data = t(as( AC_data,"dgCMatrix")), label = AC_label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
ds2_res <- append(x = ds2_res,values = adjustedRandIndex(predict_AC_test, AC_test_data$label))
}
ds2_res
plot_ly(data, x = ~resolution) %>%
add_trace(y = ~lym_ARI, name = 'lym', mode = 'lines+markers',size = 4, markers = list(color = "#158aff"),
line = list(color = '#b1d6fb', width = 4), type = 'scatter') %>%
add_trace(y = ~SMC_ARI, name = 'SMC', mode = 'lines+markers',size = 4, markers = list(color = "#ff2121"),
line = list(color = '#e2a2ca', width = 4), type = 'scatter')%>%
layout(xaxis = list(zerolinecolor = '#ffffff', zerolinewidth = 2, range = c(0, 0.32),
gridcolor = 'ffff', dtick=0.05, title = "resolution"),
yaxis = list(zerolinecolor = '#ffffff', zerolinewidth = 2, range = c(0.2, 0.9),
gridcolor = 'ffff', dtick=0.1, title = "ARI")) %>%
layout(font = list(family = "Arial", size = 20, color = "black"))
Warning: 'scatter' objects don't have these attributes: 'markers'
Valid attributes include:
'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isG [... truncated]
Warning: 'scatter' objects don't have these attributes: 'markers'
Valid attributes include:
'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isG [... truncated]
Warning: 'scatter' objects don't have these attributes: 'markers'
Valid attributes include:
'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isG [... truncated]
Warning: 'scatter' objects don't have these attributes: 'markers'
Valid attributes include:
'cliponaxis', 'connectgaps', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'fill', 'fillcolor', 'groupnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'orientation', 'selected', 'selectedpoints', 'showlegend', 'stackgaps', 'stackgroup', 'stream', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isG [... truncated]
library(DALEX)
library(modelStudio)
source("tianfengRwrappers.R")
ds2 <- readRDS("ds2.rds")
Idents(ds2) <- ds2$Classification1
ds2 <- RenameIdents(ds2, 'SMC1' = 0, 'Fibromyocyte' = 1, 'Pericyte' = 2, 'Fibroblast' = 3, 'SMC2' = 4)
ds2_data <- get_data_table(ds2, highvar = T, type = "data")
ds2_label <- as.numeric(as.character(Idents(ds2)))
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
# colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],"dgCMatrix")), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],"dgCMatrix")), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = "multi:softprob", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
# saveRDS(bst_model,"reduced_ds2model.rds")
# 建立解释器
explainer_xgb <- DALEX::explain(bst_model, data = ds2_train_data$data[1:200,], y = ds2_train_data$label[1:200], label = "XGBoost")
# modelStudio::modelStudio(explainer = explainer_xgb, new_observation = ds2_train_data$data[1:2,] )
p1 <- DALEX::variable_profile(explainer_xgb, variable = "ACKR1", type = "accumulated")
plot(p1)
p2 <- single_variable(explainer_xgb, rownames(ds2_data)[1:10], type = "pdp")
plot(p2)
# p3 <- variable_importance(explainer_xgb, variable =rownames(ds2_data)[1:10],loss_function = loss_root_mean_square)
reduced_ds2model <- readRDS("reduced_ds2model.rds")
p1 <- xgb.plot.tree(feature_names = rownames(ds2_data), model = reduced_ds2model,render = F)
DiagrammeR::render_graph(p1)
saveRDS(p1,"ds2_tree.rds")
p2 <- xgb.ggplot.shap.summary(data = ds2_train_data$data, features = rownames(ds2_data)[1:10], model = reduced_ds2model)
xgb.ggplot.deepness(model = reduced_ds2model, which = c("2x1", "max.depth", "med.depth", "med.weight"))
shapvalues <- xgb.plot.shap(data = ds2_train_data$data, features = rownames(ds2_data)[1:10], model = reduced_ds2model,plot = F)
library(SHAPforxgboost)
shap_contrib <- predict(reduced_ds2model, ds2_train_data$data, predcontrib = TRUE)
shap_contrib <- data.frame(shap_contrib[[4]]) ##提取SMC1对应的分类
BIAS0 <- shap_contrib[, ncol(shap_contrib)][1]
shap_contrib[,"BIAS"] <- NULL
imp <- colMeans(abs(shap_contrib))
mean_shap_score <- imp[order(imp, decreasing = T)]
shap_values<- list(shap_score = shap_contrib, mean_shap_score = mean_shap_score,
BIAS0 = BIAS0)
# To prepare the long-format data:
shap_long <- shap.prep(shap_contrib = shap_contrib, X_train = ds2_train_data$data,top_n = 30)
shap.plot.summary(shap_long, x_bound = 1.2, dilute = 30)
shap.plot.force_plot(plot_data,zoom_in_location = 800)
Data has N = 6675 | zoom in length is 150 at location 800.
shap.plot.dependence(data_long = shap_long, x= "CFH", color_feature = "CFH") + mytheme + geom_smooth(method = "loess")+ scale_colour_gradient(low="#1E90FF", high="#ff2121") +scale_x_continuous(limits = c(0,4))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 6 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 6 rows containing non-finite values (stat_smooth).
Warning: Removed 6 rows containing missing values (geom_point).
shap.plot.dependence(data_long = shap_long, data_int = shap_int[[2]],
x= "LUM", y = "LUM", color_feature = "LUM")
`geom_smooth()` using formula 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, :
pseudoinverse used at -0.015223
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, :
neighborhood radius 1.807
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, :
reciprocal condition number 1.5904e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric, :
There are other near singularities as well. 3.2104
data("iris")
X1 = as.matrix(iris[,-5])
mod1 = xgboost::xgboost(
data = X1, label = iris$Species, gamma = 0, eta = 1,
lambda = 0, nrounds = 1, verbose = FALSE)
# shap.values(model, X_dataset) returns the SHAP
# data matrix and ranked features by mean|SHAP|
shap_values <- shap.values(xgb_model = mod1, X_train = X1)
shap_values$mean_shap_score
Petal.Length Petal.Width Sepal.Length Sepal.Width
0.62935975 0.21664035 0.02910357 0.00000000
shap_values_iris <- shap_values$shap_score
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.